{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f25c1140-47c3-45cb-9ffe-741c7ad23494",
   "metadata": {},
   "outputs": [],
   "source": [
    "from langchain_community.vectorstores import FAISS\n",
    "from langchain_community.document_loaders import WebBaseLoader\n",
    "from langchain_core.output_parsers import StrOutputParser\n",
    "from langchain_core.runnables import RunnableParallel, RunnablePassthrough\n",
    "from langchain_huggingface import HuggingFaceEmbeddings\n",
    "from langchain_openai import ChatOpenAI\n",
    "\n",
    "from langchain.prompts import ChatPromptTemplate\n",
    "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
    "\n",
    "from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, pipeline\n",
    "\n",
    "import bs4\n",
    "import getpass\n",
    "import os"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "ac6bb745-1026-462f-8c85-830688e0b950",
   "metadata": {},
   "outputs": [
    {
     "name": "stdin",
     "output_type": "stream",
     "text": [
      " ········\n"
     ]
    }
   ],
   "source": [
    "os.environ[\"OPENAI_API_KEY\"] = getpass.getpass()\n",
    "llm = ChatOpenAI(model=\"gpt-4o-mini\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "c4fae097-7bcc-4d4e-ae68-430677547d08",
   "metadata": {},
   "outputs": [],
   "source": [
    "loader = WebBaseLoader([\"https://en.wikipedia.org/wiki/2024_Summer_Olympics\"])\n",
    "documents = loader.load()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "a7a59974-c5b9-4d77-ba1b-12a5ae9b0dc0",
   "metadata": {},
   "outputs": [],
   "source": [
    "text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)\n",
    "all_splits = text_splitter.split_documents(documents)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "08d57c20-37a1-41cf-ba6e-cff9e41db709",
   "metadata": {},
   "outputs": [],
   "source": [
    "vectorstore = FAISS.from_documents(all_splits, HuggingFaceEmbeddings(model_name=\"sentence-transformers/all-mpnet-base-v2\"))\n",
    "retriever = vectorstore.as_retriever()\n",
    "\n",
    "def format_docs(docs):\n",
    "    return \"\\n\\n\".join(doc.page_content for doc in docs)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "9835b6ae-11a9-4b7e-bdb1-1171e4a32e88",
   "metadata": {},
   "outputs": [],
   "source": [
    "template = \"\"\"Answer the question based only on the following context:\n",
    "                {context}\n",
    "\n",
    "                Question: {question}\n",
    "                \"\"\"\n",
    "prompt = ChatPromptTemplate.from_template(template)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "39615af6-e0ba-43a2-813c-214467656ab8",
   "metadata": {},
   "outputs": [],
   "source": [
    "rag_chain = (\n",
    "    {\"context\": retriever | format_docs, \"question\": RunnablePassthrough()}\n",
    "    | prompt\n",
    "    | llm\n",
    "    | StrOutputParser()\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "id": "fa1bf73b-3263-4380-8a17-d3dd5ef19f2c",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The 2024 Summer Olympics are being held in Paris, France, with events also taking place in 16 additional cities across Metropolitan France and one subsite in Tahiti, French Polynesia.\n"
     ]
    }
   ],
   "source": [
    "response = rag_chain.invoke(\"Where are the 2024 summer olympics being held?\")\n",
    "print(response)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "f10bae90-b91a-456c-ba88-1ccd122b6fb8",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "The new sport being added for the 2024 Summer Olympics is breaking, which is making its Olympic debut. Additionally, skateboarding, sport climbing, and surfing are returning to the programme after debuting at the 2020 Summer Olympics.\n"
     ]
    }
   ],
   "source": [
    "result = rag_chain.invoke(\"What are the new sports that are being added for the 2024 summer olympics?\")\n",
    "print(result)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "5bf72e11-af76-4377-b4fd-7fa0825a89da",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "There are expected to be 45,000 volunteers recruited for the 2024 Summer Olympics.\n"
     ]
    }
   ],
   "source": [
    "result = rag_chain.invoke(\"How many volunteers are supposed to be present for the 2024 summer olympics?\")\n",
    "print(result)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "chapter10",
   "language": "python",
   "name": "chapter10"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.12"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
